---
title: "Replication Script"
subtitle: "The Power of Place: Rural Descriptive Representation and Policy Support"
author: "Lukas K. Alexander and Dihan Shi, Washington University in St. Louis"
output:
  html_document:
    df_print: paged
---

```{r setup, warning=FALSE}
# ============================================================
# 1) Setup: packages, constants, reusable helpers, load data, outputs
# ============================================================

# ---- packages ----
library(readxl)
library(readr)
library(writexl)

library(dplyr)
library(tidyr)
library(stringr)

library(estimatr)
library(broom)
library(purrr)

library(ggplot2)
library(patchwork)

# ---- constants ----
outcome_pattern <- "^(T|E|U|PH|In|B)_(Rural|Urban|Control)_(Support|Benefit|Area|Trust|Proximity)$"

policy_levels <- c("Transportation", "Education", "Utilities",
                   "Public Health", "Infrastructure", "Banking")

policy_recode_map <- c(
  "T"  = "Transportation",
  "E"  = "Education",
  "U"  = "Utilities",
  "PH" = "Public Health",
  "In" = "Infrastructure",
  "B"  = "Banking"
)

policy_map <- c(
  "Transportation" = "T",
  "Education"      = "E",
  "Utilities"      = "U",
  "PublicHealth"   = "PH",
  "Public Health"  = "PH",
  "Infrastructure" = "In",
  "Banking"        = "B"
)

# ---- helper: basic respondent covariates ----
add_basic_covariates <- function(dat) {
  dat %>%
    mutate(
      ParticipantID = ParticipantId,
      age  = as.numeric(Age),
      male = if_else(Sex %in% c("Male", "Man"), 1L, 0L),
      white = if_else(Race == "White", 1L, 0L),
      party7 = case_when(
        PID_D == 1 ~ 1L,   # Strong Democrat
        PID_D == 2 ~ 2L,   # Not very strong Democrat
        PID_I == 1 ~ 3L,   # Lean Democrat
        PID_I == 3 ~ 4L,   # Independent
        PID_I == 2 ~ 5L,   # Lean Republican
        PID_R == 2 ~ 6L,   # Not very strong Republican
        PID_R == 1 ~ 7L,   # Strong Republican
        TRUE ~ NA_integer_
      ),
      party_num = as.numeric(party7)
    )
}

# ---- helper: wide vignette builder (all vignettes) ----
build_vignettes_wide <- function(dat, extra_keep = character()) {
  long <- dat %>%
    pivot_longer(
      cols          = matches(outcome_pattern),
      names_to      = c("policy_abbrev", "treat_raw", "outcome_raw"),
      names_pattern = outcome_pattern,
      values_to     = "response"
    ) %>%
    filter(!is.na(response)) %>%
    mutate(
      policy_area = recode(policy_abbrev, !!!policy_recode_map),
      treat       = str_to_lower(treat_raw),
      outcome     = str_to_lower(outcome_raw)
    ) %>%
    select(
      ParticipantID, age, male, white, party7, party_num, all_of(extra_keep),
      policy_area, treat, outcome, response
    )

  long %>%
    pivot_wider(
      names_from   = outcome,
      values_from  = response,
      names_prefix = "Y_"
    )
}

# ---- helper: common outcome recodes (proximity + area bin + factors) ----
recode_vignette_outcomes <- function(wide_vig) {
  wide_vig %>%
    mutate(
      Y_support   = as.numeric(Y_support),
      Y_benefit   = as.numeric(Y_benefit),
      Y_area      = as.numeric(Y_area),
      Y_trust     = as.numeric(Y_trust),
      Y_proximity = as.numeric(Y_proximity),

      # proximity recode: 1 -> 1, 5 -> 2, 7 -> 3, 8 -> 4
      Y_proximity = case_when(
        Y_proximity == 1 ~ 1,
        Y_proximity == 5 ~ 2,
        Y_proximity == 7 ~ 3,
        Y_proximity == 8 ~ 4,
        TRUE ~ NA_real_
      ),

      # binary area DV
      Y_area_bin = if_else(Y_area > 1, 1, 0, missing = NA_real_)
    ) %>%
    mutate(
      Z = factor(treat, levels = c("control", "rural", "urban")),
      Z = relevel(Z, ref = "control"),
      policy_area = factor(policy_area, levels = policy_levels)
    )
}

# ---- helper: coefficient extraction + plotters ----
get_coefs <- function(model) {
  tidy(model, conf.int = TRUE) %>%
    filter(term %in% c("Zrural", "Zurban"))
}

make_plot <- function(coefs, title_text, ylab_text = "Treatment Effect (95% CI)") {
  ggplot(coefs, aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high)) +
    geom_pointrange() +
    geom_hline(yintercept = 0, lty = 2) +
    labs(title = title_text, x = NULL, y = ylab_text) +
    scale_x_discrete(labels = c("Zrural" = "Rural state rep",
                                "Zurban" = "Urban state rep")) +
    theme_classic()
}

make_overlay_plot <- function(coefs_df, title_text, ylab_text = "Treatment Effect (95% CI)") {
  ggplot(
    coefs_df,
    aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high, color = factor(run))
  ) +
    geom_pointrange(position = position_dodge(width = 0.4)) +
    geom_hline(yintercept = 0, lty = 2) +
    labs(title = title_text, x = NULL, y = ylab_text, color = "Run") +
    scale_x_discrete(labels = c("Zrural" = "Rural state rep",
                                "Zurban" = "Urban state rep")) +
    theme_classic()
}

dat <- read_excel("data/survey.xlsx") %>%
  add_basic_covariates()

ruca <- read.csv("data/RUCA-codes-2020-zipcode.csv", stringsAsFactors = FALSE)

# ---- output directory + helpers ----
results_dir <- "results"
if (!dir.exists(results_dir)) dir.create(results_dir, recursive = TRUE)

save_fig <- function(plot_obj, filename,
                     width = 10, height = 4.5, dpi = 300) {
  ggsave(
    filename = file.path(results_dir, filename),
    plot     = plot_obj,
    width    = width,
    height   = height,
    dpi      = dpi
  )
}

save_table_txt <- function(df, filename) {
  write.table(
    df,
    file      = file.path(results_dir, filename),
    sep       = "\t",
    row.names = FALSE,
    quote     = FALSE
  )
}

save_lines <- function(lines, filename) {
  writeLines(lines, con = file.path(results_dir, filename))
}

save_model_summaries <- function(models_named, filename, title = NULL) {
  stopifnot(is.list(models_named), length(models_named) > 0)
  out <- character()

  if (!is.null(title)) {
    out <- c(out, title, "")
  }

  for (nm in names(models_named)) {
    out <- c(out,
             paste0("=== ", nm, " ==="),
             capture.output(summary(models_named[[nm]])),
             "")
  }

  save_lines(out, filename)
}
```

```{r}
# ============================================================
# 2) Main analysis (all respondents): models + coefficient plots (Figures 2, 3, B1; Table B2)
# ============================================================

wide_vig <- build_vignettes_wide(dat) %>%
  recode_vignette_outcomes()

analytic <- wide_vig %>%
  filter(!is.na(Z))

mod_support <- lm_robust(
  Y_support ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_benefit <- lm_robust(
  Y_benefit ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_area <- lm_robust(
  Y_area_bin ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_trust <- lm_robust(
  Y_trust ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_prox <- lm_robust(
  Y_proximity ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

# Table B2
models_b2 <- list(
  mod_support = mod_support,
  mod_benefit = mod_benefit,
  mod_area    = mod_area,
  mod_trust   = mod_trust,
  mod_prox    = mod_prox
)
save_model_summaries(models_b2, "Table_B2.txt",
                     title = "Table B2: Main analysis (all respondents) model summaries")

summary(mod_support)
summary(mod_benefit)
summary(mod_area)
summary(mod_trust)
summary(mod_prox)

coefs_support <- get_coefs(mod_support)
coefs_benefit <- get_coefs(mod_benefit)
coefs_area    <- get_coefs(mod_area)
coefs_trust   <- get_coefs(mod_trust)
coefs_prox    <- get_coefs(mod_prox)

p_support <- make_plot(coefs_support, "DV: Support for law")
p_benefit <- make_plot(coefs_benefit, "DV: Rural people benefit from law")
p_area    <- make_plot(coefs_area,    "DV: Law primarily benefits rural areas")
p_trust   <- make_plot(coefs_trust,   "DV: Trust in representative")
p_prox    <- make_plot(coefs_prox,    "DV: Representative is similar to respondent")

# Figure 2
fig2 <- p_support + p_benefit
fig2
save_fig(fig2, "Figure_2.png", width = 10, height = 4.5)

# Figure 3
fig3 <- p_area + p_trust
fig3
save_fig(fig3, "Figure_3.png", width = 10, height = 4.5)

# Figure B1
figB1 <- p_prox
figB1
save_fig(figB1, "Figure_B1.png", width = 6.5, height = 4.5)
```

```{r}
# ============================================================
# 3) Post hoc (observed) power analysis (Table C1; estimation can take up to 1hr)
# ============================================================

posthoc_power_cluster <- function(model, data, dv,
                                  clusters = "ParticipantID",
                                  sims = 2000, alpha = 0.05,
                                  sample_N = NULL, seed = 63130) {

  set.seed(seed)

  stopifnot(dv %in% names(data))

  mf <- model.frame(model)
  keep_vars <- unique(c(dv, names(mf), clusters))
  d0 <- data %>%
    select(all_of(keep_vars)) %>%
    filter(!is.na(.data[[dv]]), !is.na(.data[[clusters]]))

  if (!is.null(sample_N)) {
    cl_ids <- unique(d0[[clusters]])
    stopifnot(sample_N <= length(cl_ids))
    sampled_ids <- sample(cl_ids, sample_N, replace = FALSE)
    d0 <- d0 %>% filter(.data[[clusters]] %in% sampled_ids)
  }

  d0$mu_hat <- predict(model, newdata = d0)
  d0$e_hat  <- d0[[dv]] - d0$mu_hat

  resid_list <- split(d0$e_hat, d0[[clusters]])
  cl_ids     <- names(resid_list)

  one_draw <- function() {
    boot_ids <- sample(cl_ids, length(cl_ids), replace = TRUE)

    d_boot <- map2_dfr(
      boot_ids, seq_along(boot_ids),
      function(cid, j) {
        block <- d0 %>% filter(.data[[clusters]] == cid)
        e_star <- sample(resid_list[[cid]], nrow(block), replace = TRUE)
        block$Y_star <- block$mu_hat + e_star
        block[[clusters]] <- paste0("boot_", j)
        block
      }
    )
    d_boot
  }

  fml <- formula(model)

  out <- replicate(sims, {
    d_star <- one_draw()

    d_star[[dv]] <- d_star$Y_star

    m_star <- lm_robust(
      fml,
      data     = d_star,
      clusters = d_star[[clusters]],
      se_type  = "stata"
    )

    td <- tidy(m_star)
    p_rural <- td$p.value[td$term == "Zrural"]
    p_urban <- td$p.value[td$term == "Zurban"]

    c(sig_rural = as.integer(!is.na(p_rural) && p_rural < alpha),
      sig_urban = as.integer(!is.na(p_urban) && p_urban < alpha))
  })

  tibble(
    dv = dv,
    N_clusters = length(unique(d0[[clusters]])),
    sims = sims,
    alpha = alpha,
    power_rural_vs_control = mean(out["sig_rural", ], na.rm = TRUE),
    power_urban_vs_control = mean(out["sig_urban", ], na.rm = TRUE)
  )
}

power_support  <- posthoc_power_cluster(mod_support, analytic,  "Y_support")
power_benefit  <- posthoc_power_cluster(mod_benefit, analytic,  "Y_benefit")
power_area     <- posthoc_power_cluster(mod_area,    analytic,  "Y_area_bin")
power_trust    <- posthoc_power_cluster(mod_trust,   analytic,  "Y_trust")
power_prox     <- posthoc_power_cluster(mod_prox,    analytic,  "Y_proximity")

# Table C1
tbl_c1 <- bind_rows(power_support, power_benefit, power_area, power_trust, power_prox)
tbl_c1
save_table_txt(tbl_c1, "Table_C1.txt")
```

```{r}
# ============================================================
# 4) RUCA-rural subset analysis: counts + models + plots (Table B3)
# ============================================================

ruca <- ruca %>%
  mutate(zip_chr = str_pad(ZIPCode, width = 5, pad = "0"))

dat <- dat %>%
  mutate(zip_chr = str_pad(as.character(ZIP), width = 5, pad = "0")) %>%
  left_join(ruca %>% select(zip_chr, PrimaryRUCA), by = "zip_chr") %>%
  mutate(
    rural_RUCA = if_else(
      !is.na(PrimaryRUCA) & PrimaryRUCA >= 7 & PrimaryRUCA <= 10,
      1L, 0L
    )
  )

wide_vig <- build_vignettes_wide(dat, extra_keep = c("rural_RUCA")) %>%
  recode_vignette_outcomes()

analytic <- wide_vig %>%
  filter(!is.na(Z), rural_RUCA == 1)

mod_support <- lm_robust(
  Y_support ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_benefit <- lm_robust(
  Y_benefit ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_area <- lm_robust(
  Y_area_bin ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_trust <- lm_robust(
  Y_trust ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_prox <- lm_robust(
  Y_proximity ~ Z + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

# Table B3
models_b3 <- list(
  mod_support = mod_support,
  mod_benefit = mod_benefit,
  mod_area    = mod_area,
  mod_trust   = mod_trust,
  mod_prox    = mod_prox
)
save_model_summaries(models_b3, "Table_B3.txt",
                     title = "Table B3: RUCA-rural subset model summaries")

summary(mod_support)
summary(mod_benefit)
summary(mod_area)
summary(mod_trust)
summary(mod_prox)

p_support <- make_plot(get_coefs(mod_support), "DV: Support for law\n(RUCA-rural)")
p_benefit <- make_plot(get_coefs(mod_benefit), "DV: Rural people benefit from law\n(RUCA-rural)")
p_area    <- make_plot(get_coefs(mod_area),    "DV: Law primarily benefits rural areas\n(RUCA-rural)")
p_trust   <- make_plot(get_coefs(mod_trust),   "DV: Trust in representative\n(RUCA-rural)")
p_prox    <- make_plot(get_coefs(mod_prox),    "DV: Representative is similar to respondent (RUCA-rural)")

p_support + p_benefit
p_area + p_trust
p_prox
```

```{r}
# ============================================================
# 5) Replicate main analysis using ONLY runs 1–2 (Table B4)
# ============================================================

first_two <- dat %>%
  mutate(
    pair1 = str_split_fixed(Policy_1, "\\|", 3)[,1],
    pair2 = str_split_fixed(Policy_1, "\\|", 3)[,2]
  ) %>%
  select(ParticipantID, age, male, white, party7, party_num, pair1, pair2)

first_two_long <- first_two %>%
  pivot_longer(
    cols = c(pair1, pair2),
    names_to = "run_label",
    values_to = "pair"
  ) %>%
  mutate(
    run = if_else(run_label == "pair1", 1L, 2L),

    policy = str_split_fixed(pair, "-", 2)[,1],
    treat_cap = str_split_fixed(pair, "-", 2)[,2],
    treat_lc  = str_to_lower(treat_cap),

    policy_abbrev = unname(policy_map[policy])
  ) %>%
  filter(!is.na(policy_abbrev), !is.na(treat_cap))

first_two_long <- first_two_long %>%
  rowwise() %>%
  mutate(
    col_support   = paste0(policy_abbrev, "_", treat_cap, "_Support"),
    col_benefit   = paste0(policy_abbrev, "_", treat_cap, "_Benefit"),
    col_area      = paste0(policy_abbrev, "_", treat_cap, "_Area"),
    col_trust     = paste0(policy_abbrev, "_", treat_cap, "_Trust"),
    col_proximity = paste0(policy_abbrev, "_", treat_cap, "_Proximity"),

    Y_support   = as.numeric(dat[[col_support]][match(ParticipantID, dat$ParticipantID)]),
    Y_benefit   = as.numeric(dat[[col_benefit]][match(ParticipantID, dat$ParticipantID)]),
    Y_area      = as.numeric(dat[[col_area]][match(ParticipantID, dat$ParticipantID)]),
    Y_trust     = as.numeric(dat[[col_trust]][match(ParticipantID, dat$ParticipantID)]),
    Y_proximity = as.numeric(dat[[col_proximity]][match(ParticipantID, dat$ParticipantID)])
  ) %>%
  ungroup()

first_two_long <- first_two_long %>%
  mutate(
    Y_proximity = case_when(
      Y_proximity == 1 ~ 1,
      Y_proximity == 5 ~ 2,
      Y_proximity == 7 ~ 3,
      Y_proximity == 8 ~ 4,
      TRUE ~ NA_real_
    ),
    Y_area_bin = if_else(Y_area > 1, 1, 0, missing = NA_real_),

    Z = factor(treat_lc, levels = c("control", "rural", "urban")),
    Z = relevel(Z, ref = "control"),

    policy_area = factor(
      policy,
      levels = c("Transportation", "Education", "Utilities",
                 "PublicHealth", "Public Health", "Infrastructure", "Banking")
    )
  )

mod_support_12 <- lm_robust(
  Y_support ~ Z + male + white + party_num + age + policy_area,
  data     = first_two_long,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_benefit_12 <- lm_robust(
  Y_benefit ~ Z + male + white + party_num + age + policy_area,
  data     = first_two_long,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_area_12 <- lm_robust(
  Y_area_bin ~ Z + male + white + party_num + age + policy_area,
  data     = first_two_long,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_trust_12 <- lm_robust(
  Y_trust ~ Z + male + white + party_num + age + policy_area,
  data     = first_two_long,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_prox_12 <- lm_robust(
  Y_proximity ~ Z + male + white + party_num + age + policy_area,
  data     = first_two_long,
  clusters = ParticipantID,
  se_type  = "stata"
)

# Table B4
models_b4 <- list(
  mod_support_12 = mod_support_12,
  mod_benefit_12 = mod_benefit_12,
  mod_area_12    = mod_area_12,
  mod_trust_12   = mod_trust_12,
  mod_prox_12    = mod_prox_12
)
save_model_summaries(models_b4, "Table_B4.txt",
                     title = "Table B4: Main analysis using runs 1–2 only (model summaries)")

summary(mod_support_12)
summary(mod_benefit_12)
summary(mod_area_12)
summary(mod_trust_12)
summary(mod_prox_12)

p_support_12 <- make_plot(get_coefs(mod_support_12), "Runs 1–2 only: Support for law")
p_benefit_12 <- make_plot(get_coefs(mod_benefit_12), "Runs 1–2 only: Rural people benefit")
p_area_12    <- make_plot(get_coefs(mod_area_12),    "Runs 1–2 only: Law benefits rural areas")
p_trust_12   <- make_plot(get_coefs(mod_trust_12),   "Runs 1–2 only: Trust in representative")
p_prox_12    <- make_plot(get_coefs(mod_prox_12),    "Runs 1–2 only: Similar to respondent")

p_support_12 + p_benefit_12
p_area_12 + p_trust_12
p_prox_12
```

```{r}
# ============================================================
# 6) By-run analysis (runs 1–6): overlay plots (Figures B2, B3, B4)
# ============================================================

run_map <- dat %>%
  mutate(
    p1_1 = str_split_fixed(Policy_1, "\\|", 3)[, 1],
    p1_2 = str_split_fixed(Policy_1, "\\|", 3)[, 2],
    p1_3 = str_split_fixed(Policy_1, "\\|", 3)[, 3],
    p2_1 = str_split_fixed(Policy_2, "\\|", 3)[, 1],
    p2_2 = str_split_fixed(Policy_2, "\\|", 3)[, 2],
    p2_3 = str_split_fixed(Policy_2, "\\|", 3)[, 3]
  ) %>%
  select(ParticipantID, p1_1, p1_2, p1_3, p2_1, p2_2, p2_3) %>%
  pivot_longer(
    cols = matches("^p[12]_[123]$"),
    names_to = "slot",
    values_to = "pair"
  ) %>%
  filter(!is.na(pair), pair != "") %>%
  mutate(
    run = case_when(
      slot == "p1_1" ~ 1L,
      slot == "p1_2" ~ 2L,
      slot == "p1_3" ~ 3L,
      slot == "p2_1" ~ 4L,
      slot == "p2_2" ~ 5L,
      slot == "p2_3" ~ 6L
    ),
    policy_name   = str_split_fixed(pair, "-", 2)[, 1],
    treat_cap     = str_split_fixed(pair, "-", 2)[, 2],
    policy_abbrev = unname(policy_map[policy_name]),
    treat_lc      = str_to_lower(treat_cap)
  ) %>%
  select(ParticipantID, run, policy_abbrev, treat_lc)

long <- dat %>%
  pivot_longer(
    cols          = matches(outcome_pattern),
    names_to      = c("policy_abbrev", "treat_cap", "outcome_raw"),
    names_pattern = outcome_pattern,
    values_to     = "response"
  ) %>%
  filter(!is.na(response)) %>%
  mutate(
    treat_lc = str_to_lower(treat_cap),
    outcome  = str_to_lower(outcome_raw)
  ) %>%
  mutate(
    age       = dat$age[match(ParticipantID, dat$ParticipantID)],
    male      = dat$male[match(ParticipantID, dat$ParticipantID)],
    white     = dat$white[match(ParticipantID, dat$ParticipantID)],
    party_num = dat$party_num[match(ParticipantID, dat$ParticipantID)]
  ) %>%
  left_join(run_map, by = c("ParticipantID", "policy_abbrev", "treat_lc"))

wide_vig <- long %>%
  select(ParticipantID, run, age, male, white, party_num,
         policy_abbrev, treat_lc, outcome, response) %>%
  pivot_wider(
    names_from   = outcome,
    values_from  = response,
    names_prefix = "Y_"
  ) %>%
  mutate(
    policy_area = recode(policy_abbrev, !!!policy_recode_map),
    Y_support   = as.numeric(Y_support),
    Y_benefit   = as.numeric(Y_benefit),
    Y_area      = as.numeric(Y_area),
    Y_trust     = as.numeric(Y_trust),
    Y_proximity = as.numeric(Y_proximity),
    Y_proximity = case_when(
      Y_proximity == 1 ~ 1,
      Y_proximity == 5 ~ 2,
      Y_proximity == 7 ~ 3,
      Y_proximity == 8 ~ 4,
      TRUE ~ NA_real_
    ),
    Y_area_bin = if_else(Y_area > 1, 1, 0, missing = NA_real_),
    Z = factor(treat_lc, levels = c("control", "rural", "urban")),
    Z = relevel(Z, ref = "control"),
    policy_area = factor(policy_area, levels = policy_levels)
  )

analytic <- wide_vig %>%
  filter(!is.na(run), !is.na(Z))

fit_by_run <- function(dv_var, data) {
  res_list <- lapply(1:6, function(r) {
    df_r <- data %>% filter(run == r)
    if (nrow(df_r) == 0) return(NULL)

    f <- as.formula(paste(dv_var, "~ Z + male + white + party_num + age + policy_area"))
    m <- lm_robust(
      f,
      data     = df_r,
      clusters = ParticipantID,
      se_type  = "stata"
    )

    tidy(m, conf.int = TRUE) %>%
      filter(term %in% c("Zrural", "Zurban")) %>%
      mutate(run = r, dv = dv_var)
  })

  bind_rows(res_list)
}

coefs_support   <- fit_by_run("Y_support",   analytic)
coefs_benefit   <- fit_by_run("Y_benefit",   analytic)
coefs_area      <- fit_by_run("Y_area_bin",  analytic)
coefs_trust     <- fit_by_run("Y_trust",     analytic)
coefs_proximity <- fit_by_run("Y_proximity", analytic)

p_support   <- make_overlay_plot(coefs_support,   "Support for law (by run)")
p_benefit   <- make_overlay_plot(coefs_benefit,   "Rural people benefit from law (by run)")
p_area      <- make_overlay_plot(coefs_area,      "Law primarily benefits rural areas (by run)")
p_trust     <- make_overlay_plot(coefs_trust,     "Trust in representative (by run)")
p_proximity <- make_overlay_plot(coefs_proximity, "Representative similar to respondent (by run)")

# Figure B2
figB2 <- p_support + p_benefit
figB2
save_fig(figB2, "Figure_B2.png", width = 10, height = 4.5)

# Figure B3
figB3 <- p_area + p_trust
figB3
save_fig(figB3, "Figure_B3.png", width = 10, height = 4.5)

# Figure B4
figB4 <- p_proximity
figB4
save_fig(figB4, "Figure_B4.png", width = 6.5, height = 4.5)
```

```{r}
# ============================================================
# 7) Z × rural consciousness interaction: models (Table B5)
# ============================================================

dat <- dat %>%
  mutate(
    rural_consc_raw = (as.numeric(rur_con_1) - 1) +
                      (as.numeric(rur_con_2) - 1) +
                      (as.numeric(rur_con_3) - 1)
  )

rc_mean <- mean(dat$rural_consc_raw, na.rm = TRUE)

dat <- dat %>%
  mutate(rural_consc = rural_consc_raw - rc_mean)

wide_vig <- build_vignettes_wide(dat, extra_keep = c("rural_consc")) %>%
  recode_vignette_outcomes() %>%
  mutate(rural_consc = as.numeric(rural_consc))

analytic <- wide_vig %>%
  filter(!is.na(Z), !is.na(rural_consc))

mod_support <- lm_robust(
  Y_support ~ Z * rural_consc + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_benefit <- lm_robust(
  Y_benefit ~ Z * rural_consc + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_area <- lm_robust(
  Y_area_bin ~ Z * rural_consc + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_trust <- lm_robust(
  Y_trust ~ Z * rural_consc + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

mod_prox <- lm_robust(
  Y_proximity ~ Z * rural_consc + male + white + party_num + age + policy_area,
  data     = analytic,
  clusters = ParticipantID,
  se_type  = "stata"
)

# Table B5
models_b5 <- list(
  mod_support = mod_support,
  mod_benefit = mod_benefit,
  mod_area    = mod_area,
  mod_trust   = mod_trust,
  mod_prox    = mod_prox
)
save_model_summaries(models_b5, "Table_B5.txt",
                     title = "Table B5: Z × rural consciousness interaction (model summaries)")

summary(mod_support)
summary(mod_benefit)
summary(mod_area)
summary(mod_trust)
summary(mod_prox)
```
